% Matlab code to estimate a mixed logit model with maximum simulated likelihood
% Written by Kenneth Train, first version July 19, 2006, 
%   latest edits August 9, 2006
%
% Adapted by Patrick Bigler (patrick.bigler@kpm.unibe.ch) to estimate the
% models in question, but also to allow for the estimation of a
% bootstrapped standard error.

clear all

% Declare GLOBAL variables
% GLOBAL variables are all in caps
% DO NOT CHANGE ANY OF THESE 'global' STATEMENTS
global NP NCS NROWS
global IDV NV NAMES B W
global IDF NF NAMESF F
global DRAWTYPE NDRAWS SEED1 SAVEDR PUTDR
global WANTWGT IDWGT WGT
global NALTMAX NCSMAX
global X XF S DR
global XMAT
global NMEM NTAKES NPARAM
global MDR
global NReps
global NS NP_tot

% OUTPUT FILE
% Put the name you want for your output file (including full path if not the current 
% working directory) after words "delete" and "diary".
% The 'diary off' and 'delete filename' commands close and delete the previous version 
% of the file created during your current matlab session or in any previous sessions. 
% If you want to append the new output to the old output, then 
% put % in front of the 'diary off' and 'delete filename' commands (or erase them).

diary off
delete myrun.out
diary myrun.out

% TITLE
% Put a title for the run in the quotes below, to be printed at the top of the output file.
disp 'Mixed logit with 250 MLHS draws: Car selection in Bern - Model Halton SS'

% DATA

% Number of people (decision-makers) in dataset 
NP=23074;        

% Number of choice situations in dataset. This is the number faced by all the people combined.
NCS=23074;     

% Total number of alternatives faced by all people in all choice situations combined.
% This is the number of rows of data in XMAT below.
NROWS=9816000;


% Load and/or create XMAT, a matrix that contains the data.
%
% XMAT must contain one row of data for each alternative in each choice situation for each person.
% The rows are grouped by person, and by choice situations faced by each person.
% The number of rows in XMAT must be NROWS, specified above.
% The columns in XMAT are variable that describe the alternative.
% 
% The *first* column of XMAT identifies the person who faced this alternative. 
% The people must be numbered sequentially from 1 to NP, in ascending order.
% All alternatives for a given person must be grouped together.
% The *second* column of XMAT identifies the choice situation. The choice
% situations must be numbered sequentially from 1 to NCS.
% All alternatives for a given choice situation must be grouped together.
% The *third* column of XMAT identifies the chosen alternatives (1 for
% chosen, 0 for not). One and only one alternative must be chosen for each
% choice situation.
% The remaining columns of XMAT can be any variables.

XMAT=load('EV_project09.csv');  %The variables are described below

% To help you keep up with the variables, list the variables in XMAT here.
% Start each line with % so that matlab sees that it is a comment rather than a command.
% NOTES for XMAT for sample run:
% This dataset is for people's choice among vehicles in Swiss Canton of
% Bern
% Each person faced up to 600 different options.
% The variables in XMAT are:
% 1. Person number (1-NP)            MUST BE THIS. DO NOT CHANGE.
% 2. Choice situation number (1-NCS) MUST BE THIS. DO NOT CHANGE.
% 3. Chosen alternative (1/0)        MUST BE THIS. DO NOT CHANGE.
% 4. car_option 
% 5. price (TCHF)
% 6. height 
% 7. weight
% 8. var_cost (TCHF)
% 9. engine power (KW)
% 10. EV 
% 11. hybrid
% 12. diesel
% 13. car_size 
% 14. - 17. car_size # hhsize
% 18. - 19. KW # age
% 20. ev_urban2
% 21. ev_urban3
% 22. ev_dist_ev
% 23. ev_range5
% 24. ev_year2
% 25. ev_year3
% 26. ev_pv
% 27. ev_ho
% 28. - 30. ev # wealth group
% 31. - 33. price # wealth_group
% 34. - 48. brand-dummy
% 49. - 55. cat-dummy
% 56. env_friendly
% 57. resid
% 58. resid_dist_ind
% 59. resid_distonly
% 60. resid_low
% 61. resid_high
% 62. resid_short
% 63. resid_midshort
% 64. resid_long
% 65. resid_midlong
% 66. resid_variable
% 67. var_cost_variable
% 68. var_cost_low
% 69. var_cost_high
% 70. var_cost_long
% 71. var_cost_midlong
% 72. var_cost_short
% 73. var_cost_midshort

% MODEL SPECIFICATION

% RANDOM COEFFICIENTS
% List the variables in XMAT that enter the model with random coefficients and
% give the distribution for the coefficient of each variable.
% IDV contains one row for each random coefficient and two columns.
% The *first* column gives the number of a variable in XMAT that has a random coefficient, 
% and the *second* column specifies the distribution of the coefficient for that variable.
% The distributions can be 
% 1. normal: N(b,w^2) where mean b and standard deviation w are estimated.
% 2. lognormal: coefficient is exp(beta) where beta~N(b,w^2) with b and w estimated
% 3. truncated normal, with the share below zero massed at zero: max(0,beta) where 
%                      beta~N(b,w^2) with b and w estimated.
% 4. S_B: exp(beta)/(1+exp(beta))  where beta~N(b,w^2) with b and w estimated.
% 5. normal with zero mean (for error components): N(0,w^2) where w is estimated.
% 6. triangular: b+w*t where t is triangular between -1 and 1 and mean b and spread w are estimated.
% If no random coefficients, put IDV=[];
% Notes:
% The lognormal, truncated normal, and S_B distributions give positive
% coefficients only. If you want a variable to have only negative coefficients, 
% create the negative of the variable (in the specification of XMAT above).
% The S_B distribution gives coefficients between 0 and 1. If you want
% coefficients to be between 0 and k, then multiply the variable by k (in the specification 
% of XMAT above), since b*k*x for b~(0-1) is the same as b*x for b~(0-k).
% If no random coefficients, put IDV=[];
 
IDV=[ 5 1;   ...
     6 1;   ...
     7 1;   ...
      8 1 ];

NV=size(IDV,1); %Number of random coefficients. Do not change this line.

% Give a name to each of the explanatory variables in IDV. They can 
% have up to ten characters including spaces. Put the names in single quotes and separate 
% the quotes with semicolons. If IDV=[], then set NAMES=[];
NAMES={'price';'height';'weight';'var_cost'}; 

% Starting values
% Specify the starting values for b and w for each random coeffient.
% B contains the first parameter, b, for each random coefficient.  
% It is a column vector with the same length as IDV. For distribution 5 (normal with zero mean),
% put 0 for the starting value for the mean. The code will keep it at 0.
% W contains the second parameter, w, for each random coefficient.
% It is a column vector with the same length as IDV.
% Put semicolons between the elements of B and W (so they will be column vectors).

B=[-0.0887; 0.59; -0.00003; -0.04378 ];
W=[0.01;0.001;0.0001;0.001 ];


% FIXED COEFFICIENTS
% List the variables in XMAT that enter with fixed coefficients.
% Put semicolons between the numbers.
% If no fixed coefficients, put IDF=[];

IDF=[9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;40;41;42;43;44;45;46;47;48;49;50;51;52;53;54;55;56;57];



NF=size(IDF,1); %Number of fixed coefficients. Do not change this line.

% Give a name to each of the variables in IDF.
NAMESF={'kw';'ev';'hybrid';'diesel';'size';'size2';'size3';'size4';'size5';'kw2';'kw3';'ev_urb2';'ev_urb3';'ev_dist';'ev_range5';'ev2018';'ev2019';'ev_pv';'ev_ho';'ev_w2';'ev_w3';'ev_w4';'price_w2';'price_w3';'price_w4';'brand2';'brand3';'brand4';'brand5';'brand6';'brand7';'brand8';'brand9';'brand10';'brand11';'brand12';'brand13';'brand14';'brand15';'brand16';'type2';'type3';'type4';'type5';'type6';'type7';'type8';'env_friendly';'resid'};


% Starting values.
% Specify the starting values for the fixed coefficients F.
% F must have the same length as IDF and have one column.
% Put semicolons between the elements (so F will be a column vector.)
F=[0.017;-2.456;0.0572;-0.497;0.025;0.153;0.346;0.554;0.756;-0.0052;-0.0101;0.131;-0.0944;
    -0.0261;0.007;0.104;1.533;2.242;0.398;0.866;0.693;1.459;0.0033;0.0056;0.0233;0.378;
    1.056;0.281;0.236;-0.0011;0.0844;0.174;0.789;0.2569;0.514;0.118;
    0.572;0.903;0.395;1.137;0.461;-0.566;-0.012;-0.025;-0.169;0.366;-0.063;0.052;0.05];

% Type of draws to use in simulation
% 1=pseudo-random draws
% 2=standard Halton draws
% 3=shifted and shuffled Halton draws
% 4=modified Latin hypercube sampling, shifted and shuffled 
% 5=create your own draws or load draws from file
DRAWTYPE=3;

% Number of draws from to use per person in simulation.
NDRAWS=150;

% Set seed for the random number generator.
SEED1 = 14239; 

% If DRAWTYPE=5, then create or load the draws here.
% Create or load a data array, called DR, with dimensions NV x NP x NDRAWS.
% where element DR(i,j,k) is the k-th draw of random coefficient i for person j 
% Put your statements between "if DRAWTYPE==5" and "end".
% 
% If you created DR in a previous matlab session and saved it
% with "save mydraws DR" then put "load('mydraws.mat')" here. The structure
% mydraws will contain the array DR.
% Note: If you want to use the draws that were saved to PUTDR below in a 
% previous run, see the ReadMe.txt file for instructions.

if DRAWTYPE==5
   load('mydraws.mat');
end


% Memory use
% Give the number of draws that you want held in memory at one time.
% This number must be evenly divisible into the number of draws.
% That is NDRAWS./NMEM must be a positive integer.
% To hold all draws in memory at once, set NMEM=NDRAWS.
% A larger value of NMEM requires fewer reads from disc but 
% uses more memory which can slow-down the calculations and increases 
% the chance of running out of memory.
% If DRAWTYPE=5, then you must set NMEM=NDRAWS
NMEM=10;

% If all the draws are NOT held in memory at one time (that is, if NMEM<NDRAWS), 
% then give the filename (including full path if not in the working directory)
% that you want the draws to be temporarily saved to while the code is running.
% If all draws are held in memory at one time (that is, if NMEM=NDRAWS),
% then this file will not be created. So, if NMEM=NDRAWS, you can set PUTDR=''; 
% or give a file name, whichever you find more convenient, since the name won't be used.
PUTDR='draws';

% WEIGHTS. 
% Do you want to apply weights to the people? 
% Set WANTWGT=1 if you want to apply weights; otherwise set WANTWGT=0;
WANTWGT=0;

% If WANTWGT=1, identify the variable in XMAT that contains the weights.
% This variable can vary over people but must be the same for all rows of
% data for each person. Weights cannot vary over choice situations for
% each person or over alternatives for each choice situation -- only over people.
% The code normalizes the weights such that the sum 
% of weights over people is to equal NP (to assure that standard errors 
% are correctly calculated.) If WANTWGT=0, set IDWGT=[];
IDWGT=[];

% OPTIMIZATION 
% Maximum number of iterations for the optimization routine.
% The code will abort after ITERMAX iterations, even if convergence has
% not been achieved. The default is 400, which is used when MAXITERS=[];
MAXITERS=[1000];

% Convergence criterion based on the maximum change in parameters that is considered
% to represent convergence. If all the parameters change by less than PARAMTOL 
% from one iteration to the next, then the code considers convergence to have been
% achieved. The default is 0.000001, which is used when PARAMTOL=[];
PARAMTOL=0.000001;

% Convergence criterion based on change in the log-likelihood that is
% considered to represent convergence. If the log-likelihood value changes
% less than LLTOL from one iteration to the next, then the optimization routine
% considers convergence to have been achieved. The default is 0.000001,
% which is used when LLTOL=[];
LLTOL=[];

%Add the additional option of having bootstraped standard errors:
%Set to one if you want Bootstrap and specify the number of replications in
%next line; Specify the sample percentage that should be drawn in
%Sample_perc (from 0-1)
WantBoot=0 ; 
NReps=200;
Sample_perc=.25;

NS=round(NP.*Sample_perc,0);


%Do not change the next line. It runs the model.
doit


% This loop estimates the bootstrapped if the model is specified for
% bootstrapped standard errors.
if WantBoot==1
    NP_tot=NP
    NP=NS % Replace NP with the sample number to be able to have more bootstrap runs.
    Boot
end


% These last lines delete the file of draws that is created when NMEM<NDRAWS
% since it is no longer needed. If you want to save it, then put % in front
% of these lines.
if NMEM<NDRAWS
    clear global MDR
    delete(PUTDR)
end

%% Create a table of results: 
Names_results=[NAMESF;NAMES;NAMES]
Results=table(Names_results,paramhat,stderr)

filename = 'results_sshalton.xlsx';
writetable(Results,filename,'Sheet',1,'Range','A1')
